package com.stu.pdftohtml.spire;

import org.apache.commons.io.FileUtils;
import org.junit.jupiter.api.Test;

import java.io.File;
import java.io.IOException;
import java.nio.charset.StandardCharsets;
import java.util.List;

/**
 * @Author: MI
 * @Date: 2022/2/23/0:04
 * @Description:
 */
public class ReadText {
    @Test
    public void readTxt() throws IOException {
        String fileName = "d:/ExtractText.txt";
        List<String> readLines = FileUtils.readLines(new File(fileName), StandardCharsets.UTF_8);
        for (String readLine : readLines) {
            String all = readLine.replaceAll("((\r\n)|\n)[\\s\t ]*(\\1)+", "$1").replaceAll("^((\r\n)|\n)", "");
            System.out.println(all);
        }

    }
}
